# handle conflicts
options(conflicts.policy = "depends.ok")
devtools::source_url("https://github.com/jjcurtin/lab_support/blob/main/fun_ml.R?raw=true")ℹ SHA-1 hash of file is "32a0bc8ced92c79756b56ddcdc9a06e639795da6"
tidymodels_conflictRules()In [1]:
# handle conflicts
options(conflicts.policy = "depends.ok")
devtools::source_url("https://github.com/jjcurtin/lab_support/blob/main/fun_ml.R?raw=true")ℹ SHA-1 hash of file is "32a0bc8ced92c79756b56ddcdc9a06e639795da6"
tidymodels_conflictRules()In [2]:
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(tidymodels))
suppressPackageStartupMessages(library(tidyposterior))
library(kableExtra, exclude = "group_rows")
library(Rcpp, exclude = "populate")
library(brms, exclude = c("ar", "mixture"))Loading 'brms' package (version 2.22.0). Useful instructions
can be found by typing help('brms'). A more detailed introduction
to the package is available through vignette('brms_overview').
theme_set(theme_classic()) In [3]:
devtools::source_url("https://github.com/jjcurtin/lab_support/blob/main/format_path.R?raw=true")ℹ SHA-1 hash of file is "de12d764438078a9341db9bc0b2472c87e0ae846"
# CHTC support functions
devtools::source_url("https://github.com/jjcurtin/lab_support/blob/main/chtc/static_files/fun_chtc.R?raw=true")ℹ SHA-1 hash of file is "6112ad4934c18197bb71037f5d65b97e1fd2b039"
In [4]:
path_processed <- format_path(str_c("studydata/risk/data_processed/lag"))
path_models_lag <- format_path(str_c("studydata/risk/models/lag"))In [5]:
auroc_dem_0 <- read_csv(here::here(path_models_lag,
"test_auroc_6_x_5_1day_0_v3_nested_strat_lh_fairness.csv"),
col_types = cols()) |>
mutate(fold_num = rep(1:5, 6),
repeat_num = c(rep(1, 5), rep(2, 5), rep(3, 5),
rep(4, 5), rep(5, 5), rep(6, 5))) |>
mutate(across(everything(), ~if_else(.x == 0, .0000001, .x))) |>
select(-outer_split_num)
auroc_dem_24 <- read_csv(here::here(path_models_lag,
"test_auroc_6_x_5_1day_24_v3_nested_strat_lh_fairness.csv"),
col_types = cols()) |>
mutate(across(everything(), ~if_else(.x == 0, .0000001, .x))) |>
mutate(fold_num = rep(1:5, 6),
repeat_num = c(rep(1, 5), rep(2, 5), rep(3, 5),
rep(4, 5), rep(5, 5), rep(6, 5))) |>
select(-outer_split_num)
auroc_dem_72 <- read_csv(here::here(path_models_lag,
"test_auroc_6_x_5_1day_72_v3_nested_strat_lh_fairness.csv"),
col_types = cols()) |>
mutate(across(everything(), ~if_else(.x == 0, .0000001, .x))) |>
mutate(fold_num = rep(1:5, 6),
repeat_num = c(rep(1, 5), rep(2, 5), rep(3, 5),
rep(4, 5), rep(5, 5), rep(6, 5))) |>
select(-outer_split_num)
auroc_dem_168 <- read_csv(here::here(path_models_lag,
"test_auroc_6_x_5_1day_168_v3_nested_strat_lh_fairness.csv"),
col_types = cols()) |>
mutate(across(everything(), ~if_else(.x == 0, .0000001, .x))) |>
mutate(fold_num = rep(1:5, 6),
repeat_num = c(rep(1, 5), rep(2, 5), rep(3, 5),
rep(4, 5), rep(5, 5), rep(6, 5))) |>
select(-outer_split_num)
auroc_dem_336 <- read_csv(here::here(path_models_lag,
"test_auroc_6_x_5_1day_336_v3_nested_strat_lh_fairness.csv"),
col_types = cols()) |>
arrange(outer_split_num) |>
mutate(across(everything(), ~if_else(.x == 0, .0000001, .x))) |>
mutate(fold_num = rep(1:5, 6),
repeat_num = c(rep(1, 5), rep(2, 5), rep(3, 5),
rep(4, 5), rep(5, 5), rep(6, 5))) |>
select(-outer_split_num)In [6]:
auroc_dem_all <- auroc_dem_0 |>
mutate(lag = 0) |>
bind_rows(auroc_dem_24 |>
mutate(lag = 24)) |>
bind_rows(auroc_dem_72 |>
mutate(lag = 72)) |>
bind_rows(auroc_dem_168 |>
mutate(lag = 168)) |>
bind_rows(auroc_dem_336 |>
mutate(lag = 336))
set.seed(101)In [7]:
data <- auroc_dem_all |>
select(id = fold_num, id2 = repeat_num, `not white`, `non-hispanic white` = white, lag) |>
pivot_longer(cols = c(`not white`, `non-hispanic white`), names_to = "race", values_to = "auroc") |>
mutate(race = factor(race)) |>
glimpse()Rows: 300
Columns: 5
$ id <dbl> 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 1, 1…
$ id2 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3…
$ lag <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ race <fct> not white, non-hispanic white, not white, non-hispanic white, no…
$ auroc <dbl> 0.8624530, 0.9032410, 0.8416644, 0.9328614, 0.9510940, 0.9253825…
Set priors to perf_mod() defaults
In [8]:
priors <- c(
prior(normal(2, 1.1), class = "Intercept"),
prior(normal(0, 2.79), class = "b"),
prior(exponential(2.2), class = "sigma")
)In [9]:
model_race <- brm(
formula = auroc ~ 1 + race + lag + race*lag + (1 | id2/id), # folds nested in repeats
data = subset(data, !is.na(auroc)),
family = gaussian(link = "logit"), # normal distribution w/auroc bounded between 0 and 1
chains = 4,
prior = priors,
control = list(adapt_delta = 0.99),
iter = 6000,
thin = 10,
seed = 123
)Compiling Stan program...
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 13.1.6 (clang-1316.0.21.2.5)’
using SDK: ‘MacOSX12.3.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
^~~~~~~
1 error generated.
make: *** [foo.o] Error 1
Start sampling
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 7.7e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.77 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 6000 [ 0%] (Warmup)
Chain 1: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 1: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 1: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 1: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 1: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 1: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 1: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 1: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 1: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 1: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 1: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 15.169 seconds (Warm-up)
Chain 1: 11.724 seconds (Sampling)
Chain 1: 26.893 seconds (Total)
Chain 1:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 2.7e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 6000 [ 0%] (Warmup)
Chain 2: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 2: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 2: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 2: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 2: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 2: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 2: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 2: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 2: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 2: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 2: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 15.811 seconds (Warm-up)
Chain 2: 10.441 seconds (Sampling)
Chain 2: 26.252 seconds (Total)
Chain 2:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 2.3e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 6000 [ 0%] (Warmup)
Chain 3: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 3: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 3: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 3: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 3: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 3: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 3: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 3: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 3: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 3: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 3: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 15.195 seconds (Warm-up)
Chain 3: 7.318 seconds (Sampling)
Chain 3: 22.513 seconds (Total)
Chain 3:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 2.3e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 6000 [ 0%] (Warmup)
Chain 4: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 4: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 4: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 4: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 4: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 4: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 4: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 4: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 4: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 4: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 4: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 15.704 seconds (Warm-up)
Chain 4: 9.199 seconds (Sampling)
Chain 4: 24.903 seconds (Total)
Chain 4:
In [10]:
summary(model_race) Family: gaussian
Links: mu = logit; sigma = identity
Formula: auroc ~ 1 + race + lag + race * lag + (1 | id2/id)
Data: subset(data, !is.na(auroc)) (Number of observations: 293)
Draws: 4 chains, each with iter = 6000; warmup = 3000; thin = 10;
total post-warmup draws = 1200
Multilevel Hyperparameters:
~id2 (Number of levels: 6)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.33 0.31 0.01 1.15 1.01 907 1175
~id2:id (Number of levels: 30)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 1.20 0.20 0.88 1.66 1.00 1048 1173
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 4.06 0.43 3.26 4.96 1.00 1050 1171
racenotwhite -2.82 0.33 -3.53 -2.24 1.00 1155 1139
lag -0.00 0.00 -0.00 0.00 1.00 1316 1112
racenotwhite:lag 0.00 0.00 -0.00 0.00 1.00 1319 1112
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.10 0.00 0.09 0.11 1.00 1315 1172
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
In [11]:
pp_race <- summary(model_race)$fixed |>
as_tibble(rownames = "coef") |>
select(coef,
pp_mean = Estimate,
pp_lower = `l-95% CI`,
pp_upper = `u-95% CI`) plot posterior distribution for race effect
In [12]:
as.matrix(model_race, variable = "b_racenotwhite") |>
as_tibble() |>
ggplot(aes(x = b_racenotwhite)) +
geom_histogram(fill = "grey", color = "black", bins = 30) +
geom_segment(mapping = aes(y = 250, yend = 300, x = pp_mean, xend = pp_mean),
data = subset(pp_race, coef == "racenotwhite")) +
geom_segment(mapping = aes(y = 275, yend = 275, x = pp_lower, xend = pp_upper),
data = subset(pp_race, coef == "racenotwhite")) +
geom_vline(xintercept = 0, linetype = "dashed") plot posterior distribution for interaction effect
In [13]:
as.matrix(model_race, variable = "b_racenotwhite:lag") |>
ggplot(aes(x = `b_racenotwhite:lag`)) +
geom_histogram(fill = "grey", color = "black", bins = 30) +
geom_segment(mapping = aes(y = 100, yend = 150, x = pp_mean, xend = pp_mean),
data = subset(pp_race, coef == "racenotwhite:lag")) +
geom_segment(mapping = aes(y = 125, yend = 125, x = pp_lower, xend = pp_upper),
data = subset(pp_race, coef == "racenotwhite:lag")) +
geom_vline(xintercept = 0, linetype = "dashed") Check convergence diagnostics
In [14]:
bayesplot::mcmc_trace(model_race, pars = c("b_Intercept", "b_racenotwhite", "b_lag", "b_racenotwhite:lag"))bayesplot::mcmc_acf(model_race, pars = c("b_Intercept", "b_racenotwhite", "b_lag", "b_racenotwhite:lag"))bayesplot::mcmc_dens(model_race, pars = c("b_Intercept", "b_racenotwhite", "b_lag", "b_racenotwhite:lag"))Check posteriors
In [16]:
data <- auroc_dem_all |>
select(id = fold_num, id2 = repeat_num, female, male, lag) |>
pivot_longer(cols = c(female, male), names_to = "sex", values_to = "auroc") |>
mutate(sex = factor(sex, levels = c("male", "female"))) |>
glimpse()Rows: 300
Columns: 5
$ id <dbl> 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 1, 1…
$ id2 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3…
$ lag <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ sex <fct> female, male, female, male, female, male, female, male, female, …
$ auroc <dbl> 0.9245708, 0.8477054, 0.8592244, 0.9544339, 0.9019895, 0.9756489…
model_sex <-brm(
formula = auroc ~ 1 + sex + lag + sex*lag + (1 | id2/id), # folds nested in repeats
data = subset(data, !is.na(auroc)),
family = gaussian(link = "logit"), # normal distribution w/auroc bounded between 0 and 1
chains = 4,
prior = priors,
control = list(adapt_delta = 0.99),
iter = 6000,
thin = 10,
seed = 123
)Compiling Stan program...
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 13.1.6 (clang-1316.0.21.2.5)’
using SDK: ‘MacOSX12.3.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
^~~~~~~
1 error generated.
make: *** [foo.o] Error 1
Start sampling
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0.000259 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 2.59 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 6000 [ 0%] (Warmup)
Chain 1: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 1: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 1: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 1: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 1: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 1: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 1: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 1: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 1: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 1: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 1: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 26.822 seconds (Warm-up)
Chain 1: 10.119 seconds (Sampling)
Chain 1: 36.941 seconds (Total)
Chain 1:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 2.4e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.24 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 6000 [ 0%] (Warmup)
Chain 2: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 2: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 2: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 2: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 2: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 2: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 2: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 2: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 2: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 2: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 2: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 26.307 seconds (Warm-up)
Chain 2: 9.913 seconds (Sampling)
Chain 2: 36.22 seconds (Total)
Chain 2:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 2.8e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.28 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 6000 [ 0%] (Warmup)
Chain 3: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 3: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 3: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 3: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 3: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 3: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 3: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 3: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 3: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 3: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 3: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 25.575 seconds (Warm-up)
Chain 3: 13.654 seconds (Sampling)
Chain 3: 39.229 seconds (Total)
Chain 3:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 2.3e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 6000 [ 0%] (Warmup)
Chain 4: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 4: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 4: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 4: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 4: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 4: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 4: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 4: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 4: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 4: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 4: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 26.281 seconds (Warm-up)
Chain 4: 10.768 seconds (Sampling)
Chain 4: 37.049 seconds (Total)
Chain 4:
In [17]:
summary(model_sex) Family: gaussian
Links: mu = logit; sigma = identity
Formula: auroc ~ 1 + sex + lag + sex * lag + (1 | id2/id)
Data: subset(data, !is.na(auroc)) (Number of observations: 300)
Draws: 4 chains, each with iter = 6000; warmup = 3000; thin = 10;
total post-warmup draws = 1200
Multilevel Hyperparameters:
~id2 (Number of levels: 6)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.08 0.10 0.00 0.30 1.00 1060 1127
~id2:id (Number of levels: 30)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.34 0.05 0.25 0.45 1.00 1085 1309
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 2.42 0.09 2.24 2.60 1.00 1011 986
sexfemale -0.52 0.06 -0.64 -0.41 1.00 1294 1257
lag -0.00 0.00 -0.00 -0.00 1.00 1288 1160
sexfemale:lag -0.00 0.00 -0.00 -0.00 1.00 1295 1160
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.03 0.00 0.03 0.04 1.00 1128 1129
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
In [18]:
pp_sex <- summary(model_sex)$fixed |>
as_tibble(rownames = "coef") |>
select(coef,
pp_mean = Estimate,
pp_lower = `l-95% CI`,
pp_upper = `u-95% CI`) plot posterior distribution for sex effect
In [19]:
as.matrix(model_sex, variable = "b_sexfemale") |>
as_tibble() |>
ggplot(aes(x = b_sexfemale)) +
geom_histogram(fill = "grey", color = "black", bins = 30) +
geom_segment(mapping = aes(y = 300, yend = 350, x = pp_mean, xend = pp_mean),
data = subset(pp_sex, coef == "sexfemale")) +
geom_segment(mapping = aes(y = 325, yend = 325, x = pp_lower, xend = pp_upper),
data = subset(pp_sex, coef == "sexfemale")) +
geom_vline(xintercept = 0, linetype = "dashed") plot posterior distribution for interaction effect
In [20]:
as.matrix(model_sex, variable = "b_sexfemale:lag") |>
ggplot(aes(x = `b_sexfemale:lag`)) +
geom_histogram(fill = "grey", color = "black", bins = 30) +
geom_segment(mapping = aes(y = 100, yend = 150, x = pp_mean, xend = pp_mean),
data = subset(pp_sex, coef == "sexfemale:lag")) +
geom_segment(mapping = aes(y = 125, yend = 125, x = pp_lower, xend = pp_upper),
data = subset(pp_sex, coef == "sexfemale:lag")) +
geom_vline(xintercept = 0, linetype = "dashed") In [21]:
Check posteriors
In [23]:
data <- auroc_dem_all |>
select(id = fold_num, id2 = repeat_num, `above poverty`, `below poverty`, lag) |>
pivot_longer(cols = c(`above poverty`, `below poverty`), names_to = "income",
values_to = "auroc") |>
mutate(income = factor(income)) |>
glimpse()Rows: 300
Columns: 5
$ id <dbl> 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 1, …
$ id2 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, …
$ lag <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ income <fct> above poverty, below poverty, above poverty, below poverty, abo…
$ auroc <dbl> 0.8902036, 0.9093397, 0.9092639, 0.9241711, 0.9392897, 0.902794…
model_income <- brm(
formula = auroc ~ 1 + income + lag + income*lag + (1 | id2/id), # folds nested in repeats
data = subset(data, !is.na(auroc)),
family = gaussian(link = "logit"), # normal distribution w/auroc bounded between 0 and 1
chains = 4,
prior = priors,
control = list(adapt_delta = 0.999),
iter = 6000,
thin = 10,
seed = 123
)Compiling Stan program...
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 13.1.6 (clang-1316.0.21.2.5)’
using SDK: ‘MacOSX12.3.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -I/opt/R/arm64/include -fPIC -falign-functions=64 -Wall -g -O2 -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
^~~~~~~
1 error generated.
make: *** [foo.o] Error 1
Start sampling
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 7.8e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.78 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 6000 [ 0%] (Warmup)
Chain 1: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 1: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 1: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 1: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 1: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 1: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 1: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 1: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 1: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 1: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 1: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 32.957 seconds (Warm-up)
Chain 1: 17.609 seconds (Sampling)
Chain 1: 50.566 seconds (Total)
Chain 1:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 2.6e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.26 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 6000 [ 0%] (Warmup)
Chain 2: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 2: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 2: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 2: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 2: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 2: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 2: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 2: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 2: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 2: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 2: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 35.192 seconds (Warm-up)
Chain 2: 19.175 seconds (Sampling)
Chain 2: 54.367 seconds (Total)
Chain 2:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 2.6e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.26 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 6000 [ 0%] (Warmup)
Chain 3: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 3: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 3: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 3: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 3: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 3: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 3: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 3: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 3: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 3: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 3: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 39.954 seconds (Warm-up)
Chain 3: 37.112 seconds (Sampling)
Chain 3: 77.066 seconds (Total)
Chain 3:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 2.7e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 6000 [ 0%] (Warmup)
Chain 4: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 4: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 4: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 4: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 4: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 4: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 4: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 4: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 4: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 4: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 4: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 34.666 seconds (Warm-up)
Chain 4: 9.797 seconds (Sampling)
Chain 4: 44.463 seconds (Total)
Chain 4:
Warning: There were 18 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
Warning: Examine the pairs() plot to diagnose sampling problems
In [24]:
summary(model_income) Family: gaussian
Links: mu = logit; sigma = identity
Formula: auroc ~ 1 + income + lag + income * lag + (1 | id2/id)
Data: subset(data, !is.na(auroc)) (Number of observations: 300)
Draws: 4 chains, each with iter = 6000; warmup = 3000; thin = 10;
total post-warmup draws = 1200
Multilevel Hyperparameters:
~id2 (Number of levels: 6)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.12 0.12 0.00 0.47 1.00 1105 1154
~id2:id (Number of levels: 30)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.48 0.08 0.35 0.65 1.00 1245 1182
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
Intercept 2.27 0.12 2.03 2.52 1.00 1119
incomebelowpoverty -0.37 0.07 -0.50 -0.24 1.00 1075
lag -0.00 0.00 -0.00 -0.00 1.00 1086
incomebelowpoverty:lag -0.00 0.00 -0.00 0.00 1.00 1220
Tail_ESS
Intercept 1151
incomebelowpoverty 1171
lag 1131
incomebelowpoverty:lag 1052
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 0.04 0.00 0.04 0.05 1.00 1354 1218
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
In [25]:
pp_income <- summary(model_income)$fixed |>
as_tibble(rownames = "coef") |>
select(coef,
pp_mean = Estimate,
pp_lower = `l-95% CI`,
pp_upper = `u-95% CI`) plot posterior distribution for income effect
In [26]:
as.matrix(model_income, variable = "b_incomebelowpoverty") |>
as_tibble() |>
ggplot(aes(x = b_incomebelowpoverty)) +
geom_histogram(fill = "grey", color = "black", bins = 30) +
geom_segment(mapping = aes(y = 225, yend = 275, x = pp_mean, xend = pp_mean),
data = subset(pp_income, coef == "incomebelowpoverty")) +
geom_segment(mapping = aes(y = 250, yend = 250, x = pp_lower, xend = pp_upper),
data = subset(pp_income, coef == "incomebelowpoverty")) +
geom_vline(xintercept = 0, linetype = "dashed") plot posterior distribution for interaction effect
In [27]:
as.matrix(model_income, variable = "b_incomebelowpoverty:lag") |>
ggplot(aes(x = `b_incomebelowpoverty:lag`)) +
geom_histogram(fill = "grey", color = "black", bins = 30) +
geom_segment(mapping = aes(y = 125, yend = 175, x = pp_mean, xend = pp_mean),
data = subset(pp_income, coef == "incomebelowpoverty:lag")) +
geom_segment(mapping = aes(y = 150, yend = 150, x = pp_lower, xend = pp_upper),
data = subset(pp_income, coef == "incomebelowpoverty:lag")) +
geom_vline(xintercept = 0, linetype = "dashed") In [28]:
bayesplot::mcmc_trace(model_income, pars = c("b_Intercept", "b_incomebelowpoverty", "b_lag", "b_incomebelowpoverty:lag"))bayesplot::mcmc_acf(model_income, pars = c("b_Intercept", "b_incomebelowpoverty", "b_lag", "b_incomebelowpoverty:lag"))bayesplot::mcmc_dens(model_income, pars = c("b_Intercept", "b_incomebelowpoverty", "b_lag", "b_incomebelowpoverty:lag"))Check posteriors